drop _all
set more 1
set matsize 800
capture log close

*LOAD SI FIRM DATA
use "${mastersi}\si_panel.dta" 

*PREPARE TO MERGE WITH TEMPERATURE DATA FROM NASA POWER
sort kabucode year
destring kabucode_1990, replace
merge m:1 kabucode year using "${masterweather}temp_yearly.dta"

keep if _m==3
drop _m

*H. DEFLAT VALUE ADDED
sort year
merge m:1 year using "${otherdata}gdpdeflator.dta"
drop _merge
replace value_added = value_added/(gdpdef/100)

*Drop obs with either wages or value_added missing
drop if wages ==. | value_added ==.

*A.DROP UNLIKELY LARGE SPIKES
bysort PSID (year): gen gr_va = (value_added - value_added[_n-1]) / value_added[_n-1]
drop if gr_va >=20 & gr_va != .

*(2)Generate growth in annual employment
bysort PSID (year): gen gr_em = (num_workers - num_workers[_n-1]) / num_workers[_n-1]
drop if gr_em >=10 & gr_em != .

*B.DROP OBSERVATIONS WITH MISREPORTING
drop if num_workers == 0  & value_added != 0 & value_added !=.
drop if num_workers == 0  & output != 0 & output !=.
drop if wages == 0  & value_added != 0 & value_added !=.
drop if wages == 0  & output != 0 & output !=.
drop if num_workers == 0  & eltrc_PLN != 0 & eltrc_PLN !=.
drop if wages == 0  & eltrc_PLN != 0 & eltrc_PLN !=.
drop if cap_est<0

*C.Drop observations with unrealistically high and unrealistically low labor intensities
gen firmlabint = wages/value_added
drop if firmlabint <=0.005 | firmlabint >= 2

*D. DROP OBSERVATIONS WITH NEGATIVE CAPITAL
drop if cap_est<0

*F1. CLEAN UP THE START YEARS
bys PSID: egen birthinsample = min(year)

keep if year>=2001

bys PSID: egen firstyearinsample_2001 = min(year)

*FIX EXPORT VALUES
	gen b1_100 = (expShare > 1 & expShare <= 100)
		replace expShare = expShare / 100 if b1_100 == 1
		drop b1_100
		
	gen g1 = (expShare > 1 & expShare < .)	
		replace expShare = expShare / 1000 if g1 == 1
		drop g1

	replace exporter = 0 if exporter == 2
	replace exporter = 0 if (exporter >= 1.5 & exporter < .)
	replace exporter = 1 if (exporter > 0.75 & exporter < .)
	replace exporter = 0 if exporter < 0.75
	
	replace expShare = 0 if exporter == 0
	
xtset PSID year
sort PSID year

sort PSID year

save "${mastersi}si_combined0112.dta", replace

***********************************************************************
***********************************************************************
*Construct within-industry productivity terciles
clear
use "${mastersi}si_combined0112.dta"

gen va_pwk = value_added/num_workers

*Generate firm-specific time-invariant productivity indicators
keep PSID year va_pwk prod3

bysort PSID: egen meanprod3 = mean(prod3)
*Drop the two plants that switched prod3 categories
drop if meanprod3 != prod3

reshape wide va_pw, i(PSID) j(year)

gen prod2 = floor(prod3/10)

*STEP 1: CONSTRUCT AVERAGE OF TERCILE CUTOFFS
forvalues i = 2001(1)2012{
egen Bin3Pdtyprod2_`i' = xtile(va_pwk`i'), by(prod2) nq(3)
bys prod2: egen Bin1cutoffmax`i' = max(va_pwk`i') if Bin3Pdtyprod2_`i'== 1
bys prod2: egen Bin2cutoffmax`i' = max(va_pwk`i') if Bin3Pdtyprod2_`i'== 2
}
order va_pwk2002 prod2 Bin3Pdtyprod2_2002 Bin1cutoffmax2002 Bin2cutoffmax2002
sort prod2 Bin3Pdtyprod2_2002 va_pwk2002

collapse Bin1cutoffmax* Bin2cutoffmax*, by(prod2)
order Bin1cutoffmax2002 Bin2cutoffmax2002

egen Bin1cutoffmax_mean = rowmean(Bin1cutoffmax*)
egen Bin2cutoffmax_mean = rowmean(Bin2cutoffmax*)

keep prod2 Bin1cutoffmax_mean Bin2cutoffmax_mean
sort prod2

save "${mastersi}si_tercilecutoffs_0112.dta", replace

***********************************************************************
***********************************************************************
clear

use "${mastersi}si_combined0112.dta"

gen va_pwk = value_added/num_workers

*Generate firm-specific time-invariant productivity indicators

keep PSID year va_pwk exporter prod3 firstyearinsample_2001

bysort PSID: egen meanprod3 = mean(prod3)
*Drop the two plants that switched prod3 categories
drop if meanprod3 != prod3

reshape wide va_pw exporter, i(PSID) j(year)

gen prod2 = floor(prod3/10)

gen va_pwkinitial = .

forvalues i = 2001(1)2012{
replace va_pwkinitial = va_pwk`i' if `i'== firstyearinsample
}


gen exporter_initial = .

forvalues i = 2001(1)2012{
replace exporter_initial = exporter`i' if `i'== firstyearinsample
}

keep PSID va_pwkinitial exporter_initial prod2

sort prod2

merge m:1 prod2 using "${mastersi}si_tercilecutoffs_0112.dta"
drop _merge

gen Bin3ter = .
replace Bin3ter = 1 if va_pwkinitial<= Bin1cutoffmax_mean 
replace Bin3ter = 2 if va_pwkinitial>Bin1cutoffmax_mean & va_pwkinitial<=Bin2cutoffmax_mean
replace Bin3ter = 3 if va_pwkinitial>Bin2cutoffmax_mean & va_pwkinitial!=.

drop Bin1 Bin2
sort PSID

save "${mastersi}Bin3ter_combined0112.dta", replace

***********************************************************************
***********************************************************************
*assemble data for combined margin analysis
clear
use "${mastersi}si_combined0112.dta"

sort PSID
*Merge with yearly average industry ranked firm-specific productivity bins
merge m:1 PSID using "${mastersi}Bin3ter_combined0112.dta"

keep if _m==3
drop _merge
drop prod2

gen lvladded = ln(value_added)
gen lflabd = ln(num_workers)
gen labor_nonprod = num_workers-laborCnt_prod
gen lmp_ratio = ln(labor_nonprod/laborCnt_prod)
gen lCaptoLabor = ln(cap_est/num_workers)
gen llabor_nonprod = ln(labor_nonprod)

save "${mastersi}si_analysis_combined0112.dta", replace

***********************************************************************
***********************************************************************
***********************************************************************
*LP productivity estimates for intensive margin analysis

use "${mastersi}si_combined0112.dta", clear

xtset PSID year

gen prod2 = floor(prod3/10)

drop prod3

su value_added wages raw_mat cap_est expenses_elect

foreach var of varlist value_added wages raw_mat cap_est expenses_elect{
	
	gen log_`var' = log(`var')
	
}

gen tfp_wrdg_raw1 = .

forval i = 31/39{

prodest log_value_added if prod2 == `i', free(log_wages) state(log_cap_est) proxy(log_raw_mat) va met(wrdg) poly(2) id(PSID) t(year)

predict tfp_wrdg_raw, residuals

replace tfp_wrdg_raw1 = tfp_wrdg_raw if tfp_wrdg_raw1 == . 

drop tfp_wrdg_raw
}

keep PSID tfp_wrdg_raw1 year prod2

bys PSID: egen prod2_min = min(prod2)

gen Dummy = (prod2_min!=prod2)

drop if PSID == 68086

keep PSID tfp_wrdg_raw1 year prod2

reshape wide tfp_wrdg_raw1, i(PSID) j(year)

egen pdty_wrdg = rowmean(tfp_wrdg*)

egen BinPdty3_initialprod2_wrdg = xtile(pdty_wrdg), by(prod2) nq(3)

label var BinPdty3_initialprod2 "prod2 xtile(3) based on pdty_wrdg"

drop tfp*

sort PSID 

save "${interpdty}pdtybin_wrdg_combine0112.dta", replace

***********************************************************************
***********************************************************************
clear

use "${mastersi}si_combined0112.dta"
sort PSID
*Merge with yearly average tfp estimated using control function approach
merge m:1 PSID using "${interpdty}pdtybin_wrdg_combine0112.dta"

keep if _m==3
drop _merge
drop prod2

gen lvladded = ln(value_added)
gen lflabd = ln(num_workers)

gen labor_nonprod = num_workers-laborCnt_prod
gen lmp_ratio = ln(labor_nonprod/laborCnt_prod)

gen lCaptoLabor = ln(cap_est/num_workers)
gen llabor_nonprod = ln(labor_nonprod)

save "${mastersi}si_analysis_combined0112_wrdg.dta", replace
